import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
data= sns.load_dataset("titanic")
#eda
print("--------------------------------")
display(data.shape)
print("--------------------------------")
display(data.info())
print("--------------------------------")
display(data.isnull().sum()[data.isnull().sum()>0])
print("--------------------------------")
display(data.describe(include="all"))
print("--------------------------------")
display(data.corr().style.background_gradient(cmap='coolwarm'))
--------------------------------
(891, 15)
-------------------------------- <class 'pandas.core.frame.DataFrame'> RangeIndex: 891 entries, 0 to 890 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 survived 891 non-null int64 1 pclass 891 non-null int64 2 sex 891 non-null object 3 age 714 non-null float64 4 sibsp 891 non-null int64 5 parch 891 non-null int64 6 fare 891 non-null float64 7 embarked 889 non-null object 8 class 891 non-null category 9 who 891 non-null object 10 adult_male 891 non-null bool 11 deck 203 non-null category 12 embark_town 889 non-null object 13 alive 891 non-null object 14 alone 891 non-null bool dtypes: bool(2), category(2), float64(2), int64(4), object(5) memory usage: 80.7+ KB
None
--------------------------------
age 177 embarked 2 deck 688 embark_town 2 dtype: int64
--------------------------------
| survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 891.000000 | 891.000000 | 891 | 714.000000 | 891.000000 | 891.000000 | 891.000000 | 889 | 891 | 891 | 891 | 203 | 889 | 891 | 891 |
| unique | NaN | NaN | 2 | NaN | NaN | NaN | NaN | 3 | 3 | 3 | 2 | 7 | 3 | 2 | 2 |
| top | NaN | NaN | male | NaN | NaN | NaN | NaN | S | Third | man | True | C | Southampton | no | True |
| freq | NaN | NaN | 577 | NaN | NaN | NaN | NaN | 644 | 491 | 537 | 537 | 59 | 644 | 549 | 537 |
| mean | 0.383838 | 2.308642 | NaN | 29.699118 | 0.523008 | 0.381594 | 32.204208 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| std | 0.486592 | 0.836071 | NaN | 14.526497 | 1.102743 | 0.806057 | 49.693429 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| min | 0.000000 | 1.000000 | NaN | 0.420000 | 0.000000 | 0.000000 | 0.000000 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 25% | 0.000000 | 2.000000 | NaN | 20.125000 | 0.000000 | 0.000000 | 7.910400 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 50% | 0.000000 | 3.000000 | NaN | 28.000000 | 0.000000 | 0.000000 | 14.454200 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 75% | 1.000000 | 3.000000 | NaN | 38.000000 | 1.000000 | 0.000000 | 31.000000 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| max | 1.000000 | 3.000000 | NaN | 80.000000 | 8.000000 | 6.000000 | 512.329200 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
--------------------------------
C:\Users\User\AppData\Local\Temp\ipykernel_28720\1412386682.py:11: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. display(data.corr().style.background_gradient(cmap='coolwarm'))
| survived | pclass | age | sibsp | parch | fare | adult_male | alone | |
|---|---|---|---|---|---|---|---|---|
| survived | 1.000000 | -0.338481 | -0.077221 | -0.035322 | 0.081629 | 0.257307 | -0.557080 | -0.203367 |
| pclass | -0.338481 | 1.000000 | -0.369226 | 0.083081 | 0.018443 | -0.549500 | 0.094035 | 0.135207 |
| age | -0.077221 | -0.369226 | 1.000000 | -0.308247 | -0.189119 | 0.096067 | 0.280328 | 0.198270 |
| sibsp | -0.035322 | 0.083081 | -0.308247 | 1.000000 | 0.414838 | 0.159651 | -0.253586 | -0.584471 |
| parch | 0.081629 | 0.018443 | -0.189119 | 0.414838 | 1.000000 | 0.216225 | -0.349943 | -0.583398 |
| fare | 0.257307 | -0.549500 | 0.096067 | 0.159651 | 0.216225 | 1.000000 | -0.182024 | -0.271832 |
| adult_male | -0.557080 | 0.094035 | 0.280328 | -0.253586 | -0.349943 | -0.182024 | 1.000000 | 0.404744 |
| alone | -0.203367 | 0.135207 | 0.198270 | -0.584471 | -0.583398 | -0.271832 | 0.404744 | 1.000000 |
import plotly.express as px
for elem in data.columns:
if elem!='survived':
fig = px.bar(data, x='survived', y=elem)
fig.show()
data.head(3)
| survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S | Third | man | True | NaN | Southampton | no | False |
| 1 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C | First | woman | False | C | Cherbourg | yes | False |
| 2 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S | Third | woman | False | NaN | Southampton | yes | True |
data.drop(["deck","alive"],axis=1,inplace=True)
data.isna().sum()[data.isna().sum()>0]
age 177 embarked 2 embark_town 2 dtype: int64
data.age = data.age.fillna(data.age.mode()[0])
data.embarked = data.embarked.fillna(data.embarked.mode()[0])
data.embark_town = data.embark_town.fillna(data.embark_town.mode()[0])
data.head(2)
| survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | embark_town | alone | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S | Third | man | True | Southampton | False |
| 1 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C | First | woman | False | Cherbourg | False |
data.sex=data.sex.map({"male":0,"female":1})
data.embarked=data.embarked.map({"S":0,"C":1,"Q":2})
data["class"]=data["class"].map({'Third':0, 'First':1, 'Second':2})
data.who=data.who.map({"man":0,"woman":1,"child":2})
data.adult_male=data.adult_male.map({True:0,False:1})
data.embark_town=data.embark_town.map({'Southampton':0, 'Cherbourg':1, 'Queenstown':2})
data.alone=data.alone.map({True:0,False:1})
data.age=data.age.astype(int)
data.fare= data.fare.round().astype(int)
data["class"]=data["class"].astype(int)
data.head()
| survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | embark_town | alone | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 3 | 0 | 22 | 1 | 0 | 7 | 0 | 0 | 0 | 0 | 0 | 1 |
| 1 | 1 | 1 | 1 | 38 | 1 | 0 | 71 | 1 | 1 | 1 | 1 | 1 | 1 |
| 2 | 1 | 3 | 1 | 26 | 0 | 0 | 8 | 0 | 0 | 1 | 1 | 0 | 0 |
| 3 | 1 | 1 | 1 | 35 | 1 | 0 | 53 | 0 | 1 | 1 | 1 | 0 | 1 |
| 4 | 0 | 3 | 0 | 35 | 0 | 0 | 8 | 0 | 0 | 0 | 0 | 0 | 0 |
X= data.drop(["survived"],axis=1)
y= data['survived']
X= (X - X.min()) / (X.max() - X.min())
X
| pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | embark_town | alone | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 0.0 | 0.2750 | 0.125 | 0.000000 | 0.013672 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 1 | 0.0 | 1.0 | 0.4750 | 0.125 | 0.000000 | 0.138672 | 0.5 | 0.5 | 0.5 | 1.0 | 0.5 | 1.0 |
| 2 | 1.0 | 1.0 | 0.3250 | 0.000 | 0.000000 | 0.015625 | 0.0 | 0.0 | 0.5 | 1.0 | 0.0 | 0.0 |
| 3 | 0.0 | 1.0 | 0.4375 | 0.125 | 0.000000 | 0.103516 | 0.0 | 0.5 | 0.5 | 1.0 | 0.0 | 1.0 |
| 4 | 1.0 | 0.0 | 0.4375 | 0.000 | 0.000000 | 0.015625 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 886 | 0.5 | 0.0 | 0.3375 | 0.000 | 0.000000 | 0.025391 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 887 | 0.0 | 1.0 | 0.2375 | 0.000 | 0.000000 | 0.058594 | 0.0 | 0.5 | 0.5 | 1.0 | 0.0 | 0.0 |
| 888 | 1.0 | 1.0 | 0.3000 | 0.125 | 0.333333 | 0.044922 | 0.0 | 0.0 | 0.5 | 1.0 | 0.0 | 1.0 |
| 889 | 0.0 | 0.0 | 0.3250 | 0.000 | 0.000000 | 0.058594 | 0.5 | 0.5 | 0.0 | 0.0 | 0.5 | 0.0 |
| 890 | 1.0 | 0.0 | 0.4000 | 0.000 | 0.000000 | 0.015625 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
891 rows × 12 columns
from sklearn.model_selection import train_test_split
X_train, X_test, y_train,y_test = train_test_split(X,y,test_size=0.33)
import tensorflow as tf
from tensorflow import keras
model = keras.Sequential([keras.layers.Flatten(input_shape=(len(X.columns),)),
keras.layers.Dense(128,activation=tf.nn.relu),
keras.layers.Dense(256,activation=tf.nn.relu),
keras.layers.Dense(256,activation=tf.nn.relu),
keras.layers.Dense(1,activation=tf.nn.sigmoid)])
model.compile(optimizer="adam", loss="binary_crossentropy",metrics=['accuracy'])
model.fit(X_train,y_train,epochs=50,batch_size=1)
test_loss,test_acc=model.evaluate(X_test, y_test)
Epoch 1/50 596/596 [==============================] - 1s 857us/step - loss: 0.4914 - accuracy: 0.7718 Epoch 2/50 596/596 [==============================] - 1s 862us/step - loss: 0.4399 - accuracy: 0.8121 Epoch 3/50 596/596 [==============================] - 1s 863us/step - loss: 0.4400 - accuracy: 0.8171 Epoch 4/50 596/596 [==============================] - 1s 851us/step - loss: 0.4273 - accuracy: 0.8138 Epoch 5/50 596/596 [==============================] - 1s 844us/step - loss: 0.4189 - accuracy: 0.8289 Epoch 6/50 596/596 [==============================] - 1s 857us/step - loss: 0.4167 - accuracy: 0.8305 Epoch 7/50 596/596 [==============================] - 1s 863us/step - loss: 0.4153 - accuracy: 0.8188 Epoch 8/50 596/596 [==============================] - 1s 868us/step - loss: 0.4121 - accuracy: 0.8289 Epoch 9/50 596/596 [==============================] - 1s 857us/step - loss: 0.4205 - accuracy: 0.8087 Epoch 10/50 596/596 [==============================] - 1s 876us/step - loss: 0.4071 - accuracy: 0.8289 Epoch 11/50 596/596 [==============================] - 1s 904us/step - loss: 0.4063 - accuracy: 0.8255 Epoch 12/50 596/596 [==============================] - 1s 908us/step - loss: 0.4083 - accuracy: 0.8255 Epoch 13/50 596/596 [==============================] - 1s 872us/step - loss: 0.4091 - accuracy: 0.8221 Epoch 14/50 596/596 [==============================] - 1s 859us/step - loss: 0.4025 - accuracy: 0.8272 Epoch 15/50 596/596 [==============================] - 1s 856us/step - loss: 0.4057 - accuracy: 0.8372 Epoch 16/50 596/596 [==============================] - 1s 858us/step - loss: 0.4000 - accuracy: 0.8356 Epoch 17/50 596/596 [==============================] - 1s 858us/step - loss: 0.4008 - accuracy: 0.8289 Epoch 18/50 596/596 [==============================] - 1s 890us/step - loss: 0.4023 - accuracy: 0.8389 Epoch 19/50 596/596 [==============================] - 1s 853us/step - loss: 0.3947 - accuracy: 0.8423 Epoch 20/50 596/596 [==============================] - 1s 862us/step - loss: 0.3960 - accuracy: 0.8289 Epoch 21/50 596/596 [==============================] - 1s 864us/step - loss: 0.3976 - accuracy: 0.8406 Epoch 22/50 596/596 [==============================] - 1s 877us/step - loss: 0.3962 - accuracy: 0.8389 Epoch 23/50 596/596 [==============================] - 1s 857us/step - loss: 0.3913 - accuracy: 0.8406 Epoch 24/50 596/596 [==============================] - 1s 858us/step - loss: 0.3946 - accuracy: 0.8406 Epoch 25/50 596/596 [==============================] - 1s 858us/step - loss: 0.3887 - accuracy: 0.8372 Epoch 26/50 596/596 [==============================] - 1s 858us/step - loss: 0.3825 - accuracy: 0.8456 Epoch 27/50 596/596 [==============================] - 1s 864us/step - loss: 0.3826 - accuracy: 0.8523 Epoch 28/50 596/596 [==============================] - 1s 857us/step - loss: 0.3837 - accuracy: 0.8440 Epoch 29/50 596/596 [==============================] - 1s 857us/step - loss: 0.3852 - accuracy: 0.8372 Epoch 30/50 596/596 [==============================] - 1s 865us/step - loss: 0.3838 - accuracy: 0.8440 Epoch 31/50 596/596 [==============================] - 1s 873us/step - loss: 0.3840 - accuracy: 0.8406 Epoch 32/50 596/596 [==============================] - 1s 874us/step - loss: 0.3789 - accuracy: 0.8406 Epoch 33/50 596/596 [==============================] - 1s 874us/step - loss: 0.3812 - accuracy: 0.8456 Epoch 34/50 596/596 [==============================] - 1s 857us/step - loss: 0.3764 - accuracy: 0.8456 Epoch 35/50 596/596 [==============================] - 1s 857us/step - loss: 0.3759 - accuracy: 0.8440 Epoch 36/50 596/596 [==============================] - 1s 857us/step - loss: 0.3781 - accuracy: 0.8423 Epoch 37/50 596/596 [==============================] - 1s 870us/step - loss: 0.3704 - accuracy: 0.8523 Epoch 38/50 596/596 [==============================] - 1s 875us/step - loss: 0.3725 - accuracy: 0.8507 Epoch 39/50 596/596 [==============================] - 1s 873us/step - loss: 0.3645 - accuracy: 0.8440 Epoch 40/50 596/596 [==============================] - 1s 892us/step - loss: 0.3775 - accuracy: 0.8440 Epoch 41/50 596/596 [==============================] - 1s 891us/step - loss: 0.3723 - accuracy: 0.8490 Epoch 42/50 596/596 [==============================] - 1s 943us/step - loss: 0.3682 - accuracy: 0.8540 Epoch 43/50 596/596 [==============================] - 1s 932us/step - loss: 0.3642 - accuracy: 0.8523 Epoch 44/50 596/596 [==============================] - 1s 1ms/step - loss: 0.3664 - accuracy: 0.8440 Epoch 45/50 596/596 [==============================] - 1s 1ms/step - loss: 0.3604 - accuracy: 0.8456 Epoch 46/50 596/596 [==============================] - 1s 878us/step - loss: 0.3692 - accuracy: 0.8490 Epoch 47/50 596/596 [==============================] - 1s 883us/step - loss: 0.3586 - accuracy: 0.8523 Epoch 48/50 596/596 [==============================] - 1s 887us/step - loss: 0.3593 - accuracy: 0.8540 Epoch 49/50 596/596 [==============================] - 1s 877us/step - loss: 0.3519 - accuracy: 0.8591 Epoch 50/50 596/596 [==============================] - 1s 872us/step - loss: 0.3581 - accuracy: 0.8490 10/10 [==============================] - 0s 2ms/step - loss: 0.4679 - accuracy: 0.8136
print('test_acc -> ',test_acc)
print('test_loss -> ',test_loss)
test_acc -> 0.8135592937469482 test_loss -> 0.46787524223327637
from sklearn.metrics import confusion_matrix
prediction = pd.DataFrame(model.predict(X_test).round().astype(int))
confusion_matrix(y_test,prediction)
10/10 [==============================] - 0s 804us/step
array([[169, 20],
[ 35, 71]], dtype=int64)